#ifndef SW5_INIT_H_
#define SW5_INIT_H_
#include "cell.h"
typedef struct frep {
  vec<real> f[CELL_CAP];
} frep_t;
typedef struct vec_pack_param {
  cellgrid_t *grid;
  size_t total;
  char *buf;
  int xlo, xhi, ylo, yhi, zlo, zhi;
  size_t offset[64];
} vec_pack_param_t;
enum pack_directions {
  PACK_FWD_NEG_X,
  PACK_FWD_POS_X,
  PACK_FWD_NEG_Y,
  PACK_FWD_POS_Y,
  PACK_FWD_NEG_Z,
  PACK_FWD_POS_Z,
  PACK_REV_NEG_X,
  PACK_REV_POS_X,
  PACK_REV_NEG_Y,
  PACK_REV_POS_Y,
  PACK_REV_NEG_Z,
  PACK_REV_POS_Z,
  UNPACK_FWD_NEG_X,
  UNPACK_FWD_POS_X,
  UNPACK_FWD_NEG_Y,
  UNPACK_FWD_POS_Y,
  UNPACK_FWD_NEG_Z,
  UNPACK_FWD_POS_Z,
  UNPACK_REV_NEG_X,
  UNPACK_REV_POS_X,
  UNPACK_REV_NEG_Y,
  UNPACK_REV_POS_Y,
  UNPACK_REV_NEG_Z,
  UNPACK_REV_POS_Z,
  N_PACK_DIRS
};
typedef struct sw_archdata {
  frep_t *freps;
  struct {
    int st, ed;
  } pe_range[64];
  long lbal_cnt, last_lbal;
  int nreps, bond_updated;
  vec_pack_param_t *pack_params;
} sw_archdata_t;
#include "cell.h"
INLINE int getpe(cellgrid_t *grid, int x, int y, int z) {
  int cid = (x * grid->nlocal.y + y) * grid->nlocal.z + z;
  return cid % 64;
}
INLINE int getpe_all(cellgrid_t *grid, int x, int y, int z) {
  int cid = ((x + grid->nn) * grid->nall.y + y + grid->nn) * grid->nall.z + z + grid->nn;
  return cid % 64;
}
INLINE int getcid(cellgrid_t *grid, int x, int y, int z) {
  int cid = (x * grid->nlocal.y + y) * grid->nlocal.z + z;
  return cid;
}
INLINE int getcid_all(cellgrid_t *grid, int x, int y, int z) {
  int cid = ((x + grid->nn) * grid->nall.y + y + grid->nn) * grid->nall.z + z + grid->nn;
  return cid;
}
typedef struct cellmeta {
  vec<real> basis;
  int natom, first_frep;
  long pe_mask;
  long rep_init_mask;
  int nexport, nguest;
  /* long bfilter[4]; */
  /* int nrguest; */
  // int nbonded_export, nchain2_export, nexcl_export, nscal_export, nimpr_export;
  // void *pack_st;
} cellmeta_t;
/* typedef struct topmeta { */
/*   int nbond, nangle, ntori, nimpr; */
/* } topmeta_t; */
// typedef struct exportmeta {
  extern int *lbal_cnt;
// } exportmeta_t;
#ifdef __sw_slave__

#define FOREACH_CELL_CPE_RR(grid, i, j, k, cell)                                          \
  for (int i = (grid)->dim.lo.x; i < (grid)->dim.hi.x; i++)                               \
    for (int j = (grid)->dim.lo.y; j < (grid)->dim.hi.y; j++)                             \
      for (int k = (grid)->dim.lo.z; k < (grid)->dim.hi.z; k++)                           \
        for (celldata_t *cell = get_cell_xyz((grid), i, j, k); cell != NULL; cell = NULL) \
          if ((getcid_all(grid, i, j, k) & 63) == _MYID)
/*loop over local cells in current process (without ghosts)*/
#define FOREACH_LOCAL_CELL_CPE_RR(grid, i, j, k, cell)                                    \
  for (int i = 0; i < (grid)->nlocal.x; i++)                                              \
    for (int j = 0; j < (grid)->nlocal.y; j++)                                            \
      for (int k = 0; k < (grid)->nlocal.z; k++)                                          \
        for (celldata_t *cell = get_cell_xyz((grid), i, j, k); cell != NULL; cell = NULL) \
          if ((getcid(grid, i, j, k) & 63) == _MYID)
inline int get_next_cell_dyn(cellgrid_t *grid) {
  int __cellid;
  asm volatile("faal %0, %1" : "=r"(__cellid), "+m"(*lbal_cnt) :: "memory");
  return __cellid;
}
#define FOREACH_LOCAL_CELL_CPE_DYN(grid, i, j, k, cell) \
  for (int cellid = get_next_cell_dyn((grid)); cellid < vecvol((grid)->nlocal); cellid = get_next_cell_dyn((grid)))\
    for (int i = cellid / ((grid)->nlocal.y * (grid)->nlocal.z); i >= -MAX_NN; i = -MAX_NN - 1)\
    for (int j = cellid / (grid)->nlocal.z % (grid)->nlocal.y; j >= -MAX_NN; j = -MAX_NN - 1)\
    for (int k = cellid % (grid)->nlocal.z; k >= -MAX_NN; k = -MAX_NN - 1)\
    for (celldata_t *cell = get_cell_xyz((grid), i, j, k); cell != NULL; cell = NULL)

#endif
// >>> def getmk(c):
// ...     k = 0
// ...     while (1 << k) < c:
// ...         k += 1
// ...     k -= 1
// ...     m = (1 << (k + 32)) // c
// ...     return m, k
// ... 
typedef struct div_magic {
  long mul, shift;
} div_magic_t;
INLINE void make_magic(div_magic_t *magic, int divisor) {
  int k = 0;
  while ((1L << (k + 1)) < divisor) k ++;
  long m = ((1L << k + 32) + divisor - 1) / divisor;
  magic->mul = m;
  magic->shift = k + 32;
}
#define MAGIC_DIV(divee, magic) ((divee) * (magic).mul >> (magic).shift)
//function signatures
void swinit(cellgrid_t *);
void swfinal();
void do_lbal(cellgrid_t *);
void build_pack_params_sw(sw_archdata_t *archdata, cellgrid_t *);
int initial_integrate_verlet_sw(cellgrid_t *, real, real);
void final_integrate_verlet_sw(cellgrid_t *, real);
//end function signatures
extern unsigned long athread_idle();


#endif
